Code
library(tictoc)
library(ggplot2)
library(dplyr)
library(data.table)
names_functions = list.files(here::here("functions"))
for (f in names_functions)
source(here::here("functions", f))
rm(f, names_functions)This notebook loads the MCMC output from the script estimate_models.r and calculates
Note that the plots below show the results for the entire election campaign, i.e. up until election day. The results written to csv are filtered to only include win probabilities and mean expected vote shares until the last day for which polls are available in a given scenario.
library(tictoc)
library(ggplot2)
library(dplyr)
library(data.table)
names_functions = list.files(here::here("functions"))
for (f in names_functions)
source(here::here("functions", f))
rm(f, names_functions)parties <- load_parties()
states <- load_states()
regions <- load_regions()
states_regions <- load_dataland_states_regions()
n_parties_by_state <- load_n_parties_by_geography("state")
dates_campaign <- load_dates_election_campaign(year = 2024)
electoral_votes <- load_electoral_votes()
election_day <- load_election_day()Load priors -> visualize along with posterior distribution of \pi_T
priors <- readRDS(here::here("priors", "priors.Rds"))Define a function to generate csv output and plots for each scenario
gen_results <- function(plts, scen) {
print(scen)
out <- readRDS(here::here("model", paste0("mcmc_out_", scen, ".Rds")))
df_polls <- read.csv(here::here("data", paste0("df_polls_", scen, ".csv")))
df_polls$date <- as.Date(df_polls$date)
# make non-zero polls more visible in plots
df_polls$value = ifelse(df_polls$value == 0,
NA,
df_polls$value)
df_prior_ppi <- invert_alr_on_prior(priors[[scen]][["m_mmu_T"]])
# use data.table -> much, much quicker than tidyverse!
df_draws_ppi <- convert_draws_to_dt(
rstan::extract(out, pars = "ppi")[["ppi"]],
geographies = states,
parties = parties,
dates_campaign = dates_campaign)
df_draws_ppi_reg <- convert_draws_to_dt(
rstan::extract(out, pars = "ppi_reg")[["ppi_reg"]],
geographies = regions,
parties = parties,
dates_campaign = dates_campaign)
df_draws_ppi_nat <- convert_draws_to_dt(
rstan::extract(out, pars = "ppi_nat")[["ppi_nat"]],
geographies = "National",
parties = parties,
dates_campaign = dates_campaign)
tic("calc prob win election")
df_prob_win_election <- do.call(
"rbind",
lapply(
dates_campaign,
FUN = calc_prob_win_election,
df_draws_ppi = df_draws_ppi,
df_draws_ppi_nat = df_draws_ppi_nat,
states = states,
parties = parties,
electoral_votes = electoral_votes
)
)
toc()
tic("calc prob win states")
df_prob_win_states <- do.call(
"rbind",
lapply(
dates_campaign,
FUN = calc_prob_win_states,
df_draws_ppi = df_draws_ppi,
states = states,
parties = parties
)
)
toc()
# Export mean vote share and win probabilities to csv
tic("calc mean vote states")
df_draws_ppi %>%
group_by(t, party, geography) %>%
summarise(mean_vote_share = mean(values)) %>%
rename(date = t, province = geography) %>%
mutate(party = tolower(party)) %>%
tidyr::pivot_wider(
names_from = "party",
values_from = "mean_vote_share",
names_glue = "{party}_{.value}") -> df_out_mean
toc()
df_prob_win_states %>%
mutate(party = tolower(party)) %>%
rename(province = geography) %>%
tidyr::pivot_wider(
names_from = "party",
values_from = "prob_win",
names_glue = "{party}_{.value}") -> df_out_prob_win_states
write.csv(
merge(
# only export until day of latest available poll
filter(df_out_mean, date <= max(df_polls$date)),
filter(df_out_prob_win_states, date <= max(df_polls$date)),
by = c("date", "province")
),
file = here::here(paste0("provincial_forecast_", scen, ".csv")),
row.names = FALSE
)
tic("calc mean vote share national")
df_draws_ppi_nat %>%
select(-geography) %>%
group_by(t, party) %>%
summarise(mean_vote_share = mean(values)) %>%
rename(date = t) %>%
mutate(party = tolower(party)) %>%
tidyr::pivot_wider(
names_from = "party",
values_from = "mean_vote_share",
names_glue = "{party}_{.value}") -> df_out_mean_nat
toc()
tic("transform win prob election for export")
df_prob_win_election %>%
mutate(party = tolower(party)) %>%
tidyr::pivot_wider(
names_from = "party",
values_from = "prob_win",
names_glue = "{party}_{.value}") -> df_out_prob_win_election
toc()
write.csv(
merge(
# only export until day of latest available poll
filter(df_out_mean_nat, date <= max(df_polls$date)),
filter(df_out_prob_win_election, date <= max(df_polls$date)),
by = "date"
),
file = here::here(paste0("national_forecast_", scen, ".csv")),
row.names = FALSE
)
# Plots
plot_prob_win_election(
df_prob_win_election,
election_day,
scen) -> plts[[scen]][["plt_prob_win_election"]]
tic("plot win prob election over time")
plot_prob_win_election_over_time(
df_prob_win_election,
scen) -> plts[[scen]][["plt_prob_win_election_over_time"]]
toc()
plot_prob_win_states(
df_prob_win_states,
election_day,
scen) -> plts[[scen]][["plt_prob_win_states"]]
plot_prob_win_states_over_time(
df_prob_win_states,
scen) -> plts[[scen]][["plt_prob_win_states_over_time"]]
plot_ppiT(
df_draws_ppi,
df_prior_ppi,
election_day,
n_geographies = length(states),
plt_title_prefix = scen,
plt_caption = "Dotted vertical line: prior mean"
) -> plts[[scen]][["plt_ppiT"]]
plot_ppi(
df_draws_ppi,
filter(df_polls, scenario == scen),
n_geographies = length(states),
type_of_poll = "state",
plt_title_prefix = scen,
plt_caption = "Posterior mean: dashed line; lighter (darker) ribbon: 95 (83) percent posterior credible interval; dots indicate the vote share in a survey on that day."
) -> plts[[scen]][["plt_ppi"]]
plot_ppi(
df_draws_ppi_reg,
filter(df_polls, scenario == scen),
n_geographies = length(regions),
type_of_poll = "regional",
plt_title_prefix = scen,
plt_caption = "Posterior mean: dashed line; lighter (darker) ribbon: 95 (83) percent posterior credible interval; dots indicate the vote share in a survey on that day."
) -> plts[[scen]][["plt_ppi_reg"]]
plot_ppi(
df_draws_ppi_nat,
filter(df_polls, scenario == scen),
n_geographies = 1,
type_of_poll = "national",
plt_title_prefix = scen,
plt_caption = "Posterior mean: dashed line; lighter (darker) ribbon: 95 (83) percent posterior credible interval; dots indicate the vote share in a survey on that day."
) -> plts[[scen]][["plt_ppi_nat"]]
rm(df_draws_ppi,
df_draws_ppi_reg,
df_draws_ppi_nat,
df_polls,
df_prior_ppi,
df_prob_win_election,
df_prob_win_states,
df_out_prob_win_election,
df_out_prob_win_states,
df_out_mean,
df_out_mean_nat,
out)
plts
}Loop over scenarios
scenarios <- load_scenarios()
plts <- list()
for (scen in scenarios) {
plts <- gen_results(plts, scen)
}[1] "A"
calc prob win election: 269.95 sec elapsed
calc prob win states: 207.33 sec elapsed
calc mean vote states: 0.41 sec elapsed
calc mean vote share national: 0.08 sec elapsed
transform win prob election for export: 0.02 sec elapsed
plot win prob election over time: 0.01 sec elapsed
[1] "B"
calc prob win election: 395.33 sec elapsed
calc prob win states: 204.16 sec elapsed
calc mean vote states: 0.39 sec elapsed
calc mean vote share national: 0.08 sec elapsed
transform win prob election for export: 0.02 sec elapsed
plot win prob election over time: 0.03 sec elapsed
[1] "C"
calc prob win election: 276.39 sec elapsed
calc prob win states: 254.41 sec elapsed
calc mean vote states: 0.5 sec elapsed
calc mean vote share national: 0.1 sec elapsed
transform win prob election for export: 0.01 sec elapsed
plot win prob election over time: 0 sec elapsed
[1] "D"
calc prob win election: 287.99 sec elapsed
calc prob win states: 216.22 sec elapsed
calc mean vote states: 0.4 sec elapsed
calc mean vote share national: 0.06 sec elapsed
transform win prob election for export: 0.02 sec elapsed
plot win prob election over time: 0.02 sec elapsed
[1] "E"
calc prob win election: 235.64 sec elapsed
calc prob win states: 207.51 sec elapsed
calc mean vote states: 0.47 sec elapsed
calc mean vote share national: 0.06 sec elapsed
transform win prob election for export: 0.02 sec elapsed
plot win prob election over time: 0.02 sec elapsed